data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
print(head(iris))
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rio)
## Warning: package 'rio' was built under R version 4.2.1
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:rio':
## 
##     export
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
data<-import('Salaries.csv')
head(data)
##        rank discipline yrs.since.phd yrs.service  sex salary
## 1      Prof          B            19          18 Male 139750
## 2      Prof          B            20          16 Male 173200
## 3  AsstProf          B             4           3 Male  79750
## 4      Prof          B            45          39 Male 115000
## 5      Prof          B            40          41 Male 141500
## 6 AssocProf          B             6           6 Male  97000
tail(data)
##         rank discipline yrs.since.phd yrs.service  sex salary
## 392     Prof          A            30          19 Male 151292
## 393     Prof          A            33          30 Male 103106
## 394     Prof          A            31          19 Male 150564
## 395     Prof          A            42          25 Male 101738
## 396     Prof          A            25          15 Male  95329
## 397 AsstProf          A             8           4 Male  81035
dim(data)
## [1] 397   6
names(data)
## [1] "rank"          "discipline"    "yrs.since.phd" "yrs.service"  
## [5] "sex"           "salary"
#export(data,"data.xlsx")
write.table(data,"data.txt")

?sd
## starting httpd help server ...
##  done
starwars
## # A tibble: 87 × 14
##    name        height  mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex   gender homew…⁵
##    <chr>        <int> <dbl> <chr>   <chr>   <chr>     <dbl> <chr> <chr>  <chr>  
##  1 Luke Skywa…    172    77 blond   fair    blue       19   male  mascu… Tatooi…
##  2 C-3PO          167    75 <NA>    gold    yellow    112   none  mascu… Tatooi…
##  3 R2-D2           96    32 <NA>    white,… red        33   none  mascu… Naboo  
##  4 Darth Vader    202   136 none    white   yellow     41.9 male  mascu… Tatooi…
##  5 Leia Organa    150    49 brown   light   brown      19   fema… femin… Aldera…
##  6 Owen Lars      178   120 brown,… light   blue       52   male  mascu… Tatooi…
##  7 Beru White…    165    75 brown   light   blue       47   fema… femin… Tatooi…
##  8 R5-D4           97    32 <NA>    white,… red        NA   none  mascu… Tatooi…
##  9 Biggs Dark…    183    84 black   light   brown      24   male  mascu… Tatooi…
## 10 Obi-Wan Ke…    182    77 auburn… fair    blue-g…    57   male  mascu… Stewjon
## # … with 77 more rows, 4 more variables: species <chr>, films <list>,
## #   vehicles <list>, starships <list>, and abbreviated variable names
## #   ¹​hair_color, ²​skin_color, ³​eye_color, ⁴​birth_year, ⁵​homeworld
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
data()
library(dplyr)
newdata <- select(starwars, name, height, gender)
newdata
## # A tibble: 87 × 3
##    name               height gender   
##    <chr>               <int> <chr>    
##  1 Luke Skywalker        172 masculine
##  2 C-3PO                 167 masculine
##  3 R2-D2                  96 masculine
##  4 Darth Vader           202 masculine
##  5 Leia Organa           150 feminine 
##  6 Owen Lars             178 masculine
##  7 Beru Whitesun lars    165 feminine 
##  8 R5-D4                  97 masculine
##  9 Biggs Darklighter     183 masculine
## 10 Obi-Wan Kenobi        182 masculine
## # … with 77 more rows
## # ℹ Use `print(n = ...)` to see more rows
newdata <- filter(starwars, 
                  gender == "female" & 
                    homeworld == "Ojom")
newdata
## # A tibble: 0 × 14
## # … with 14 variables: name <chr>, height <int>, mass <dbl>, hair_color <chr>,
## #   skin_color <chr>, eye_color <chr>, birth_year <dbl>, sex <chr>,
## #   gender <chr>, homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
## # ℹ Use `colnames()` to see all variable names
newdata <- filter(starwars, 
                  homeworld %in% c("Alderaan", "Coruscant", "Endor"))

newdata
## # A tibble: 7 × 14
##   name         height  mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex   gender homew…⁵
##   <chr>         <int> <dbl> <chr>   <chr>   <chr>     <dbl> <chr> <chr>  <chr>  
## 1 Leia Organa     150    49 brown   light   brown        19 fema… femin… Aldera…
## 2 Wicket Syst…     88    20 brown   brown   brown         8 male  mascu… Endor  
## 3 Finis Valor…    170    NA blond   fair    blue         91 male  mascu… Corusc…
## 4 Adi Gallia      184    50 none    dark    blue         NA fema… femin… Corusc…
## 5 Bail Presto…    191    NA black   tan     brown        67 male  mascu… Aldera…
## 6 Jocasta Nu      167    NA white   fair    blue         NA fema… femin… Corusc…
## 7 Raymus Anti…    188    79 brown   light   brown        NA male  mascu… Aldera…
## # … with 4 more variables: species <chr>, films <list>, vehicles <list>,
## #   starships <list>, and abbreviated variable names ¹​hair_color, ²​skin_color,
## #   ³​eye_color, ⁴​birth_year, ⁵​homeworld
## # ℹ Use `colnames()` to see all variable names
newdata <- mutate(starwars, 
                  height = ifelse(height < 75 | height > 200,
                                  NA,
                                  height))
newdata
## # A tibble: 87 × 14
##    name        height  mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex   gender homew…⁵
##    <chr>        <int> <dbl> <chr>   <chr>   <chr>     <dbl> <chr> <chr>  <chr>  
##  1 Luke Skywa…    172    77 blond   fair    blue       19   male  mascu… Tatooi…
##  2 C-3PO          167    75 <NA>    gold    yellow    112   none  mascu… Tatooi…
##  3 R2-D2           96    32 <NA>    white,… red        33   none  mascu… Naboo  
##  4 Darth Vader     NA   136 none    white   yellow     41.9 male  mascu… Tatooi…
##  5 Leia Organa    150    49 brown   light   brown      19   fema… femin… Aldera…
##  6 Owen Lars      178   120 brown,… light   blue       52   male  mascu… Tatooi…
##  7 Beru White…    165    75 brown   light   blue       47   fema… femin… Tatooi…
##  8 R5-D4           97    32 <NA>    white,… red        NA   none  mascu… Tatooi…
##  9 Biggs Dark…    183    84 black   light   brown      24   male  mascu… Tatooi…
## 10 Obi-Wan Ke…    182    77 auburn… fair    blue-g…    57   male  mascu… Stewjon
## # … with 77 more rows, 4 more variables: species <chr>, films <list>,
## #   vehicles <list>, starships <list>, and abbreviated variable names
## #   ¹​hair_color, ²​skin_color, ³​eye_color, ⁴​birth_year, ⁵​homeworld
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
data(msleep, package="ggplot2")

# what is the proportion of missing data for each variable?
pctmiss <- colSums(is.na(msleep))/nrow(msleep)
round(pctmiss, 2)
##         name        genus         vore        order conservation  sleep_total 
##         0.00         0.00         0.08         0.00         0.35         0.00 
##    sleep_rem  sleep_cycle        awake      brainwt       bodywt 
##         0.27         0.61         0.00         0.33         0.00
library(ggplot2)
library(mosaicData)
## Warning: package 'mosaicData' was built under R version 4.2.1
data(Marriage)
barplot(table(Marriage$race),col = c(1,2,3,4))

p <- ggplot(mpg, aes(x=displ, 
                     y=hwy, 
                     color=class)) +
  geom_point(size=3) +
  labs(x = "Engine displacement",
       y = "Highway Mileage",
       color = "Car Class") +
  theme_bw()

ggplotly(p)